#!/usr/bin/env ruby
# coding: utf-8

require 'find'

# These members/tags are common to multiple events
BYTE_SIZE_COUNT = ['byte_size', 'count']

# SUFFIX => [MESSAGE, COUNTERS, ADDITIONAL_TAGS]
EVENT_CLASSES = {
  'BytesReceived' => [
    'Bytes received.', ['received_bytes'], ['byte_size', 'protocol']
  ],
  'EventsReceived' => [
    'Events received.', ['received_events', 'received_event_bytes'], ['count', 'byte_size']
  ],
  'EventsSent' => [
    'Events sent.', ['sent_events', 'sent_event_bytes'], ['count', 'byte_size']
  ],
  'BytesSent' => [
    'Bytes sent.', ['sent_bytes'], ['byte_size', 'protocol']
  ],
}

def hash_array_add(hash, key, item)
  arr = hash.fetch(key, Array::new)
  arr.append(item)
  hash[key] = arr
end

def is_constant?(name)
  name.start_with? '"' and name.end_with? '"' or name.match? /^(.+::)[A-Z0-9_]$/
end

# A class to hold error reports and common functionality
class Event
  attr_accessor :path, :uses, :impl_internal_event
  attr_reader :name, :reports
  attr_writer :members

  def initialize(name)
    @path = nil
    @name = name
    @reports = []
    @members = {}
    @counters = {}
    @metrics = {}
    @logs = []
    @uses = 0
    @impl_internal_event = false
  end

  def add_metric(type, name, tags)
    @metrics["#{type}:#{name}"] = tags
    if type == 'counter'
      @counters[name] = tags
    end
  end

  def add_log(type, message, parameters)
    @logs.append([type, message, parameters])
  end

  # The event signature is used to check for duplicates and is
  # composed from the member names and their types, the metric types,
  # names, and their tags, and the log messages and parameters. If no
  # metrics and no logs are defined for the event, the signature is
  # `nil` to skip duplicate checking.
  def signature
    if @metrics.length == 0 and @logs.length == 0
      nil
    else
      members = @members.map { |name, type| "#{name}:#{type}" }.sort.join(':')
      metrics = @metrics.map do |name, value|
        tags = value.keys.sort.join(',')
        "#{name}(#{tags})"
      end
      metrics = metrics.sort.join(';')
      logs = @logs.sort.join(';')
      "#{members}[#{logs}][#{metrics}]"
    end
  end

  def valid?
    @reports.clear

    if @uses == 0
      @reports.append('Event has no uses.')
    end

    EVENT_CLASSES.each do |suffix, (required_message, counters, additional_tags)|
      if @name.end_with? suffix
        @logs.each do |type, message, parameters|
          if type != 'trace'
            @reports.append('Log type MUST be \"trace!\".')
          end
          if message != required_message
            @reports.append("Log message MUST be \"#{required_message}\" (is \"#{message}\").")
          end
          additional_tags.each do |tag_name|
            unless parameters.include? tag_name
              @reports.append("Log MUST contain tag \"#{tag_name}\"")
            end
          end
        end
        counters.each do |counter|
          counter = "component_#{counter}_total"
          counters_must_include(counter, additional_tags - BYTE_SIZE_COUNT)
        end
      end
    end

    has_errors = @logs.one? { |type, _, _| type == 'error' }

    # Make sure Error events output an error
    if has_errors or @name.end_with? 'Error'
      append('Error events MUST be named "___Error".') unless @name.end_with? 'Error'
      logs_should_have('error')
      counters_must_include('component_errors_total', ['error_type', 'stage'])
    end

    # Make sure error events contain the right parameters
    @logs.each do |type, message, parameters|
      if type == 'error'
        ['error_type', 'stage'].each do |parameter|
          unless parameters.include? parameter
            @reports.append("Error log MUST include parameter \"#{parameter}\".")
          end
        end

        ['component_errors_total', 'component_discarded_events_total'].each do |event_name|
          if @counters[event_name]
            ['error_code', 'error_type', 'stage'].each do |parameter|
              if parameters.include? parameter and !@counters[event_name].include? parameter
                @reports.append("Counter \"#{event_name}\" must include \"#{parameter}\" to match error log.")
              end
            end
          end
        end
      end
    end

    @counters.each do |name, tags|
      # Only component_errors_total and component_discarded_events_total metrics are considered
      if ['component_errors_total', 'component_discarded_events_total'].include? name
        # Make sure defined tags to counters are constants
        tags.each do |tag, value|
          if tag == 'stage'
            if !value.start_with? 'error_stage::'
              @reports.append("Counter \"#{name}\" tag \"#{tag}\" value must be an \"error_stage\" constant.")
            end
          elsif tag == 'error_type'
            if !value.start_with? 'error_type::'
              @reports.append("Counter \"#{name}\" tag \"#{tag}\" value must be an \"error_type\" constant.")
            end
          end
        end
      end
    end

    @reports.empty?
  end

  private

    def append(report)
      @reports.append(report)
    end

    def logs_should_have(level)
      if @logs.find_index { |type, message, parameters| type == level }.nil?
        @reports.append("This event MUST log with level #{level}.")
      end
    end

    def counters_must_include(name, required_tags)
      unless @counters.include? name
        @reports.append("This event MUST increment counter \"#{name}\".")
      else
        tags = @counters[name]
        required_tags.each do |tag|
          unless tags.include? tag
            @reports.append("Counter \"#{name}\" MUST include tag \"#{tag}\".")
          end
        end
      end
    end
end

$all_events = Hash::new { |hash, key| hash[key] = Event::new(key) }

error_count = 0

# Scan sources and build internal structures
Find.find('.') do |path|
  if path.start_with? './'
    path = path[2..]
  end

  if path.end_with? '.rs'
    text = File.read(path)

    text.scan(/\b(?:emit!|internal_event::emit)\((?:[a-z][a-z0-9_:]+)?([A-Z][A-Za-z0-9]+)/) do |event_name,|
      $all_events[event_name].uses += 1
    end

    # Check log message texts for correct formatting. See below for the
    # full regex
    if path.start_with? 'src/'
      text.scan(/(trace|debug|info|warn|error)!\(\s*(message\s*=\s*)?"([^({)][^("]+)"/) do
        |type, has_message_prefix, message|
        reports = []
        reports.append('Message must start with a capital.') unless message.match(/^[[:upper:]]/)
        reports.append('Message must end with a period.') unless message.match(/\.$/)
        unless reports.empty?
          reports.each { |report| puts "  #{report}" }
          error_count += 1
        end
      end
    end

    if (path.start_with? 'src/internal_events/' or path.start_with? 'lib/vector-common/src/internal_event/')
      # Scan internal event structs for member names
      text.scan(/[\n ]struct (\S+?)(?:<.+?>)?(?: {\n(.+?)\n\s*}|;)\n/m) do |struct_name, members|
        $all_events[struct_name].path = path
        if members
          members = members.scan(/ ([A-Za-z0-9_]+): +(.+?),/).map { |member, type| [member, type] }
          $all_events[struct_name].members = members.to_h
        end
      end

      # Scan internal event implementation blocks for logs and metrics
      text.scan(/^(\s*)impl(?:<.+?>)? InternalEvent for ([A-Za-z0-9_]+)(?:<.+?>)? {\n(.+?)\n\1}$/m) do |_space, event_name, block|
        $all_events[event_name].impl_internal_event = true

        # Scan for counter names and tags
        block.scan(/ (counter|gauge|histogram)!\((?:\n\s+)?"([^"]+)",(.+?)\)[;\n]/m) do |type, name, tags|
          tags = Hash[tags.scan(/"([^"]+)" => (.+?)(?:,|$)/)]
          $all_events[event_name].add_metric(type, name, tags)
        end

        # Scan for log outputs and their parameters
        block.scan(/
                    (trace|debug|info|warn|error)! # The log type
                    \(\s*(?:message\s*=\s*)? # Skip any leading "message =" bit
                    (?:"([^({)][^("]+)"|([^,]+)) # The log message text
                    ([^;]*?) # Match the parameter list
                    \)(?:;|\n\s*}) # Normally would end with simply ");", but some are missing the semicolon
                   /mx) do |type, raw_message, var_message, parameters|
          parameters = parameters.scan(/([a-z0-9_]+) *= .|[?%]([a-z0-9_.]+)/) \
                         .map { |assignment, simple| assignment or simple }

          message = raw_message.nil? ? var_message : raw_message

          $all_events[event_name].add_log(type, message, parameters)
        end
      end
    end
  end
end

$duplicates = Hash::new { |hash, key| hash[key] = [] }

$all_events.each do |name, event|
  if event.impl_internal_event
    signature = event.signature
    if signature
      $duplicates[event.signature].append(name)
    end
    unless event.valid?
      puts "#{event.path}: Errors in event #{event.name}:"
      event.reports.each { |report| puts "    #{report}" }
      error_count += 1
    end
  end
end

$duplicates.each do |signature, dupes|
  if dupes.length > 1
    dupes = dupes.join(', ')
    puts "Duplicate events detected: #{dupes}"
    error_count += 1
  end
end

puts "#{error_count} error(s)"
exit 1 if error_count > 0
